suppressMessages(library(Seurat))
suppressMessages(library(ggplot2))
suppressMessages(library(ggrepel))
suppressMessages(library(msigdbr))
suppressMessages(library(fgsea))
suppressMessages(library(dplyr))
# This DEA was performed using scStudio: https://compbio.imm.medicina.ulisboa.pt/app/scStudio
# scStudio employs the FindMarkers function available from the Seurat package
# Default parameters were used except for: test.use = "MAST" and logfc.threshold = -Inf
dea <- readRDS("/mnt/nmorais-nfs/marta/shiny/scstudio/tokens/karine/dea.rds")
markers_c3 <- dea$r0.4_3vsOthers_MAST
markers_c3$neutral_rank <- abs(markers_c3$`Log2FC (mean)`)
markers_c3 <- markers_c3[order(markers_c3$neutral_rank, decreasing = TRUE),]
markers_c3
markers_c1 <- dea$r0.4_1vsOthers_MAST
markers_c1$neutral_rank <- abs(markers_c1$`Log2FC (mean)`)
markers_c1 <- markers_c1[order(markers_c1$neutral_rank, decreasing = TRUE),]
markers_c1
rank_c3 <- as.data.frame(cbind(markers_c3$Gene, markers_c3$neutral_rank))
colnames(rank_c3) <- c("genes", "log2FC")
nrank_c3 <- as.numeric(as.character(rank_c3[,2]))
names(nrank_c3) <- rank_c3$genes
get_categories <- msigdbr_collections()
hallmarks <- msigdbr(species = "Mus musculus", category = "H")
hallmarks <- hallmarks %>% split(x = .$gene_symbol, f = .$gs_name)
gsea_result_c3 <- fgsea(pathways = hallmarks, eps = 0, stats = nrank_c3)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (86.96% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
results_tidy_c3 <- gsea_result_c3 %>% as_tibble() %>% arrange(desc(NES))
results_tidy_c3
rank_c1 <- as.data.frame(cbind(markers_c1$Gene, markers_c1$neutral_rank))
colnames(rank_c1) <- c("genes", "log2FC")
nrank_c1 <- as.numeric(as.character(rank_c1[,2]))
names(nrank_c1) <- rank_c1$genes
get_categories <- msigdbr_collections()
hallmarks <- msigdbr(species = "Mus musculus", category = "H")
hallmarks <- hallmarks %>% split(x = .$gene_symbol, f = .$gs_name)
gsea_result_c1 <- fgsea(pathways = hallmarks, eps = 0, stats = nrank_c1)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (81.29% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
results_tidy_c1 <- gsea_result_c1 %>% as_tibble() %>% arrange(desc(NES))
results_tidy_c1
plotEnrichment(hallmarks[["HALLMARK_INTERFERON_GAMMA_RESPONSE"]],
nrank_c3 ) + labs(title = "C3 - HALLMARK_INTERFERON_GAMMA_RESPONSE")

plotEnrichment(hallmarks[["HALLMARK_INTERFERON_GAMMA_RESPONSE"]],
nrank_c1 ) + labs(title = "C1 - HALLMARK_INTERFERON_GAMMA_RESPONSE")

leading_edge_c3 <- results_tidy_c3[results_tidy_c3$pathway == "HALLMARK_INTERFERON_GAMMA_RESPONSE",]$leadingEdge[[1]]
leading_edge_c3
## [1] "Ccl5" "Cfb" "H2-Q7" "Ifitm2" "Bst2" "Ptgs2"
## [7] "Rsad2" "Cd274" "Sod2" "Oasl1" "B2m" "Cd38"
## [13] "Isg20" "Slamf7" "Tnfaip3" "Xaf1" "Lgals3bp" "Il18bp"
## [19] "Herc6" "Arid5b" "Hif1a" "H2-D1" "Il15ra" "Irf7"
## [25] "Rnf213" "Ddx60" "Cd40" "Vcam1" "Peli1" "Il10ra"
## [31] "Stat1" "H2-Aa" "Icam1" "Rtp4" "H2-Eb1" "Psme2"
## [37] "Cxcl9" "Trim25" "Gbp4" "Stat3" "Ifit1bl1" "Tapbp"
## [43] "Psmb9" "Psme1" "Pla2g4a" "Cd86" "Ube2l6" "Nampt"
## [49] "H2-DMa" "St3gal5" "Nfkbia" "Ifih1" "Cdkn1a" "Nfkb1"
## [55] "Ccl2" "Tnfaip2" "Btg1" "Zbp1" "Irf8" "Psmb8"
## [61] "Ptpn2" "Itgb7" "Irf1" "Lcp2" "Parp14" "Stat2"
## [67] "Mvp" "Helz2" "Psmb10" "Tap1" "Eif4e3" "Myd88"
## [73] "Csf2rb2" "Fcgr1" "Il15" "Usp18" "Irf2" "Vamp8"
## [79] "Pml" "Pnpt1" "Txnip" "Gbp8" "Psmb2" "Ccl7"
## [85] "Ogfr" "Sp110" "Irf5" "Fas" "Samd9l" "Socs1"
## [91] "Cfh" "Casp7" "Cd74" "Gch1" "Sri" "Batf2"
## [97] "Ifi30" "Gbp3" "Isg15" "Sppl2a" "Ifi35" "Ifi44"
## [103] "Lap3" "Ptpn6" "Fgl2" "Parp12" "Trafd1" "Ifi27"
## [109] "Ly6e" "Pde4b" "Isoc1" "Ripk2" "Socs3" "H2-M3"
## [115] "Tnfaip6"
leading_edge_c1 <- results_tidy_c1[results_tidy_c1$pathway == "HALLMARK_INTERFERON_GAMMA_RESPONSE",]$leadingEdge[[1]]
leading_edge_c1
## [1] "H2-Aa" "H2-Eb1" "Isg15" "Cd74" "Cxcl10" "Rsad2"
## [7] "Ifit3" "H2-Q7" "Ccl5" "Ifit2" "Fcgr1" "Fgl2"
## [13] "Samhd1" "Zbp1" "Ifitm3" "Ifitm2" "Sp110" "Irf7"
## [19] "Rnf213" "Cmpk2" "Ifi35" "Ifih1" "Epsti1" "Isg20"
## [25] "Oas3" "Casp1" "Ccl2" "Trafd1" "Nmi" "Il15"
## [31] "Gch1" "Sppl2a" "Gbp3" "Psmb8" "Rtp4" "Ogfr"
## [37] "Irf5" "Tap1" "Parp14" "Ube2l6" "Ly6e" "Itgb7"
## [43] "Samd9l" "Xaf1" "Parp12" "Psmb10" "Psmb9" "Usp18"
## [49] "Herc6" "Dhx58" "Casp4" "Socs3" "Tapbp" "Casp3"
## [55] "Pml" "Jak2" "Tnfaip3" "Casp8" "Znfx1" "Irf2"
## [61] "Stat2" "Pfkp" "Helz2" "Bst2" "Nampt" "Ptpn1"
## [67] "Trim25" "Oasl1" "Ptgs2" "Cfb" "Psme2" "Psma3"
## [73] "Myd88" "Eif2ak2" "Pim1" "Slamf7" "Sri" "Stat1"
## [79] "Tnfaip2" "Psma2" "Oas2" "Gbp9" "Klrk1" "Psme1"
## [85] "Mvp" "Cd86" "Nlrc5" "Il10ra" "Cfh" "Ifit1bl1"
## [91] "Nfkb1" "Pnpt1" "Il18bp" "B2m" "Ccl7" "Cd38"
## [97] "Tdrd7" "Nfkbia" "Cd40" "Lcp2" "Il4ra" "Isoc1"
## [103] "St3gal5" "Tnfaip6" "Vcam1" "Psmb2" "Gbp8" "Hif1a"
## [109] "Cxcl9" "St8sia4" "Irf8" "Eif4e3" "Trim26" "Nod1"
## [115] "Tnfsf10" "H2-DMa" "Lgals3bp"
table(leading_edge_c3 %in% leading_edge_c1)
##
## FALSE TRUE
## 30 85
table(leading_edge_c1 %in% leading_edge_c3)
##
## FALSE TRUE
## 32 85
unique_genes <- unique(c(leading_edge_c3, leading_edge_c1))
rownames(markers_c3) <- markers_c3$Gene
rownames(markers_c1) <- markers_c1$Gene
df <- data.frame(Genes = unique_genes,
Rank_c3 = markers_c3[unique_genes,]$`Log2FC (mean)`,
Rank_c1 = markers_c1[unique_genes,]$`Log2FC (mean)`)
df
gene_label <- rep("Common", length(unique_genes))
gene_label[df$Genes %in% leading_edge_c3 & !(df$Genes %in% leading_edge_c1)] <- "Cluster 3"
gene_label[df$Genes %in% leading_edge_c1 & !(df$Genes %in% leading_edge_c3)] <- "Cluster 1"
table(gene_label)
## gene_label
## Cluster 1 Cluster 3 Common
## 32 30 85
df$gene_label <- gene_label
ggplot(df) +
geom_point( aes(x=Rank_c3, y=Rank_c1, col = gene_label)) +
geom_label_repel(aes(x=Rank_c3, y=Rank_c1, label = Genes), size = 5,
max.overlaps = 10) +
scale_color_manual(values=c("red","orange", "grey45"), name = "Leading edge genes") +
ggtitle("Hallmark: Interferon gamma response") +
xlab("C3 rank") +
ylab("C1 rank") +
theme( text = element_text(size=20)) +
geom_hline(yintercept=0, linetype="dashed",
color = "blue", size=0.3) +
geom_vline(xintercept=0, linetype="dashed",
color = "blue", size=0.3) +
coord_cartesian(ylim = c(-1, 2), xlim = c(-1,5), expand = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: ggrepel: 119 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

cxcl9 <- rep("", nrow(df))
cxcl9[df$Genes == "Cxcl9"] <- "Cxcl9"
genes <- df$Genes
genes[genes == "Il18bp"] <- ""
ggplot(df) +
geom_point( aes(x=Rank_c3, y=Rank_c1, col = gene_label), size = 6) +
geom_label_repel(aes(x=Rank_c3, y=Rank_c1, label = genes), size = 20, max.overlaps = 10) +
geom_label_repel(aes(x=Rank_c3, y=Rank_c1, label = cxcl9), size = 20, force = 15) +
scale_color_manual(values=c("magenta","red", "grey45"), name = "Leading edge genes") +
ggtitle("Hallmark: Interferon gamma response") +
xlab("C3 rank") +
ylab("C1 rank") +
theme( text = element_text(size=70)) +
geom_hline(yintercept=0, linetype="dashed",
color = "blue", size=0.3) +
geom_vline(xintercept=0, linetype="dashed",
color = "blue", size=0.3) +
coord_cartesian(ylim = c(-1, 2), xlim = c(-1,2), expand = FALSE)
## Warning: ggrepel: 99 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

write.table(df, "/mnt/nmorais-nfs/marta/pA_karine/r_session/integrated_analysis_revised/paper-figures/S4M-IFN-y.txt", row.names = FALSE, quote = FALSE)
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] dplyr_1.1.4 fgsea_1.20.0 msigdbr_7.5.1 ggrepel_0.9.5
## [5] ggplot2_3.4.2 SeuratObject_4.1.3 Seurat_4.1.1
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.16 colorspace_2.1-0 deldir_1.0-6
## [4] ellipsis_0.3.2 ggridges_0.5.6 rstudioapi_0.15.0
## [7] spatstat.data_3.0-0 farver_2.1.1 leiden_0.4.3
## [10] listenv_0.9.0 fansi_1.0.6 codetools_0.2-18
## [13] splines_4.1.2 cachem_1.0.8 knitr_1.45
## [16] polyclip_1.10-4 jsonlite_1.8.8 ica_1.0-3
## [19] cluster_2.1.4 png_0.1-8 uwot_0.1.14
## [22] shiny_1.8.0 sctransform_0.3.5 spatstat.sparse_3.0-0
## [25] compiler_4.1.2 httr_1.4.4 Matrix_1.5-3
## [28] fastmap_1.1.1 lazyeval_0.2.2 cli_3.6.2
## [31] later_1.3.2 htmltools_0.5.7 tools_4.1.2
## [34] igraph_1.3.5 gtable_0.3.4 glue_1.7.0
## [37] RANN_2.6.1 reshape2_1.4.4 fastmatch_1.1-3
## [40] Rcpp_1.0.12 scattermore_0.8 jquerylib_0.1.4
## [43] vctrs_0.6.5 babelgene_22.9 nlme_3.1-162
## [46] progressr_0.13.0 lmtest_0.9-40 spatstat.random_3.1-3
## [49] xfun_0.42 stringr_1.5.1 globals_0.16.2
## [52] mime_0.12 miniUI_0.1.1.1 lifecycle_1.0.4
## [55] irlba_2.3.5.1 goftest_1.2-3 future_1.31.0
## [58] MASS_7.3-58.2 zoo_1.8-12 scales_1.3.0
## [61] spatstat.core_2.4-4 promises_1.2.1 spatstat.utils_3.0-1
## [64] parallel_4.1.2 RColorBrewer_1.1-3 yaml_2.3.8
## [67] reticulate_1.26 pbapply_1.7-0 gridExtra_2.3
## [70] sass_0.4.8 rpart_4.1.19 stringi_1.8.3
## [73] highr_0.10 BiocParallel_1.28.3 rlang_1.1.3
## [76] pkgconfig_2.0.3 matrixStats_0.63.0 evaluate_0.23
## [79] lattice_0.20-45 ROCR_1.0-11 purrr_1.0.2
## [82] tensor_1.5 labeling_0.4.3 patchwork_1.2.0
## [85] htmlwidgets_1.6.4 cowplot_1.1.1 tidyselect_1.2.0
## [88] parallelly_1.34.0 RcppAnnoy_0.0.20 plyr_1.8.9
## [91] magrittr_2.0.3 R6_2.5.1 generics_0.1.3
## [94] withr_3.0.0 mgcv_1.8-41 pillar_1.9.0
## [97] fitdistrplus_1.1-8 survival_3.5-0 abind_1.4-5
## [100] sp_1.6-0 tibble_3.2.1 future.apply_1.10.0
## [103] crayon_1.5.2 KernSmooth_2.23-20 utf8_1.2.4
## [106] spatstat.geom_3.0-6 plotly_4.10.1 rmarkdown_2.26
## [109] grid_4.1.2 data.table_1.15.2 digest_0.6.34
## [112] xtable_1.8-4 tidyr_1.3.1 httpuv_1.6.14
## [115] munsell_0.5.0 viridisLite_0.4.2 bslib_0.6.1